# install.packages(c("snow", "numDeriv"))
library(snow) # For running on MPI server (or locally) in parallel 
library(numDeriv) # If running on MPI server, install on server also

# Location of Functions.R
workingDirectory <- "~/Dropbox (Personal)/Academic/The Statistical Analysis of Misreporting/ReplicationData"
# Location to output csvs from simulations
outputDirectory <- "~/Dropbox (Personal)/Academic/The Statistical Analysis of Misreporting/ReplicationData"

setwd(workingDirectory)
source("Functions.R")

# NOTE: Set the wallclock on compute server to 24 hours for each set of
# 1000 simulations with cores set to 64.

study3B <- function(n) {
  library(numDeriv)

  J <- 4 # Number of control items
  x <- as.matrix(rep(1, n)) # Intercept-only model

  models <- list()
  proportions <- c(0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9)

  for(i in 1:9) {
    y <- rbinom(n, J, 0.5) # i.e. plogis(0)
    treat <- sample(rep(c(0, 1), each = n / 2)) # Half treatment, half control
    true.belief <- rbinom(n, 1, 0.5) # 0.5 proportion of those holding sensitive
    y[treat == 1] <- y[treat == 1] + true.belief[treat == 1]

    liar <- direct <- rep(0, length(y))
    liar[true.belief == 1] <- rbinom(sum(true.belief == 1), 1, prob = proportions[i])
    direct[liar != 1 & true.belief == 1] <- 1

    models[[i]] <- listExperiment(y ~ 1,
                                  data = data.frame(y, treat, direct),
                                  treatment = "treat",
                                  J = J,
                                  direct = "direct",
                                  n.runs = 1,
                                  sensitive.response = 1,
                                  include.misreport.treatment = FALSE,
                                  verbose = FALSE,
                                  control.constrained = "partial")

  }

  Out <- data.frame(par.control.intercept = sapply(1:9, function(x) models[[x]]$par.control["(Intercept)"]),
                    se.control.intercept = sapply(1:9, function(x) models[[x]]$se.control["(Intercept)"]),

                    par.control.z = sapply(1:9, function(x) models[[x]]$par.control["Z == 1"]),
                    se.control.z = sapply(1:9, function(x) models[[x]]$se.control["Z == 1"]),

                    par.sensitive = sapply(1:9, function(x) models[[x]]$par.sensitive["(Intercept)"]),
                    se.sensitive = sapply(1:9, function(x) models[[x]]$se.sensitive["(Intercept)"]),

                    par.misreport = sapply(1:9, function(x) models[[x]]$par.misreport["(Intercept)"]),
                    se.misreport = sapply(1:9, function(x) models[[x]]$se.misreport["(Intercept)"]),

                    proportion = proportions,
                    n = n)

  return(Out)
}

n_cores <- as.numeric(Sys.getenv("MOAB_PROCCOUNT"))
CL <- makeCluster(n_cores, type = "MPI")
# If replicating on personal computer instead of MPI cluster:
# n_cores <- 4 # Number of threads/cores on which to run simulations
# CL <- makeCluster(n_cores, type = "SOCK")


# Note that this file was run multiple times on an MPI cluster to generate
# more than the 1000 simulations listed here. Output includes time-stamped
# file names to permit multiple runs and csvs which are then combined
# in the file SimStudyGraphs.R
n_sims <- 1000

# Export necessary functions to cluster
clusterExport(CL, list("logAdd", "mstepMisreport", "mstepSensitive",
                       "mstepControl", "mstepOutcome",
                       "estep", "listExperiment"),
              envir = environment())

# Run simluations
sim.out <- parLapply(CL, rep(c(2000, 3000, 5000), n_sims), study3B)

# Increase Sys.time() accuracy to 6 digits to avoid filename overlap
options(digits.secs = 6)
write.csv(do.call(rbind, sim.out),
          paste0(outputDirectory, "/Sim3B-", gsub(":|\\.", "-", Sys.time()), ".csv"), row.names = FALSE)

stopCluster(CL)
